In this R Markdown document we will display and document each step of CS329e R Project 5.
Project Requirements http://www.cs.utexas.edu/~cannata/dataVis/Projects/Tableau%20Project%202/R%20Project%20Requirements.html.
The dataset for this project pertains to airline on-time performance for airports throughout United States. To reduce the sheer volume of records, we choose to focus the analysis on data only from JFK. utilizing Tableau to created visualization of the relationships we uncovered.
Distance vs Average Delay
Avg(Delay) vs Age of Aircraft
Age of Aircraft vs Delay
Carrier vs Avg(Delay)
Crosstabs rank()
source("../01 SQL Crosstabs/Rank.R", echo = TRUE)
##
## > require("RCurl")
## Loading required package: RCurl
## Loading required package: bitops
##
## > require("jsonlite")
## Loading required package: jsonlite
##
## Attaching package: 'jsonlite'
##
## The following object is masked from 'package:utils':
##
## View
##
## > require("dplyr")
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## > require("plyr")
## Loading required package: plyr
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## > require("tidyr")
## Loading required package: tidyr
##
## > rank <- data.frame(fromJSON(getURL(URLencode(gsub("\n",
## + " ", "129.152.144.84:5001/rest/native/?query=\"select uniquecarrier, dest, COUNT(dest .... [TRUNCATED]
##
## > tbl_df(rank)
## Source: local data frame [135 x 4]
##
## UNIQUECARRIER DEST TOTAL_RECORDS DEST_RANK
## 1 AA LAX 3400 1
## 2 AA MIA 1828 2
## 3 AA SFO 1801 3
## 4 AA SJU 1423 4
## 5 AA DFW 741 5
## 6 AA ORD 672 6
## 7 AA SEA 366 7
## 8 AA MCO 366 7
## 9 AA LAS 366 7
## 10 AA SAN 366 7
## .. ... ... ... ...
Crosstabs last_value()/max_value and difference
source("../01 SQL Crosstabs/lastval_dif.R", echo = TRUE)
##
## > require("RCurl")
##
## > require("jsonlite")
##
## > require("plyr")
##
## > require("dplyr")
##
## > require("tidyr")
##
## > lastval_dif <- data.frame(fromJSON(getURL(URLencode(gsub("\n",
## + " ", "129.152.144.84:5001/rest/native/?query=\n\"select distinct dest, uniquec .... [TRUNCATED]
##
## > tbl_df(lastval_dif)
## Source: local data frame [6,167 x 5]
##
## DEST UNIQUECARRIER AIRTIME MAX_AIRTIME ABERRATION
## 1 ACK B6 37 76 39
## 2 ACK B6 38 76 38
## 3 ACK B6 40 76 36
## 4 ACK B6 41 76 35
## 5 ACK B6 42 76 34
## 6 ACK B6 43 76 33
## 7 ACK B6 44 76 32
## 8 ACK B6 45 76 31
## 9 ACK B6 46 76 30
## 10 ACK B6 47 76 29
## .. ... ... ... ... ...
Crosstabs nth_value
source("../01 SQL Crosstabs/Nthval.R", echo = TRUE)
##
## > require("RCurl")
##
## > require("jsonlite")
##
## > require("plyr")
##
## > require("dplyr")
##
## > require("tidyr")
##
## > nthval <- data.frame(fromJSON(getURL(URLencode(gsub("\n",
## + " ", "129.152.144.84:5001/rest/native/?query=\"\nselect l.month, l.dest, l.c_dest, .... [TRUNCATED]
##
## > tbl_df(nthval)
## Source: local data frame [544 x 4]
##
## MONTH DEST C_DEST SECONDHIGHEST_CANCELLATION
## 1 1 ORD 22 19
## 2 1 BOS 19 19
## 3 1 SFO 18 19
## 4 1 LAX 14 19
## 5 1 CLT 11 19
## 6 1 DCA 10 19
## 7 1 BUF 9 19
## 8 1 PIT 7 19
## 9 1 BTV 7 19
## 10 1 TPA 6 19
## .. ... ... ... ...
Crosstabs cume_dist
source("../01 SQL Crosstabs/cumedist.R", echo = TRUE)
##
## > require("RCurl")
##
## > require("jsonlite")
##
## > require("plyr")
##
## > require("dplyr")
##
## > require("tidyr")
##
## > cumedist <- data.frame(fromJSON(getURL(URLencode(gsub("\n",
## + " ", "129.152.144.84:5001/rest/native/?query=\"select G.dest, G.avg_airtime, cu .... [TRUNCATED]
##
## > tbl_df(cumedist)
## Source: local data frame [68 x 3]
##
## DEST AVG_AIRTIME PERCENTILE
## 1 HPN 19.00000 0.01470588
## 2 LGA 22.50000 0.02941176
## 3 BDL 24.93792 0.04411765
## 4 PVD 31.60531 0.05882353
## 5 ALB 32.73851 0.07352941
## 6 PHL 33.92096 0.08823530
## 7 BWI 39.21727 0.10294118
## 8 BOS 39.60775 0.11764706
## 9 SYR 44.53689 0.13235295
## 10 BTV 45.50635 0.14705883
## .. ... ... ...